/* Open N instances of Stata to speed up processing
see call_length.do for further comments
 
calculation of intra-translation-constant sample size in words

where N is kept constant, 
but the word order is randomized per book instead of verse
*/
local dir `c(pwd)'
use entropy_bible, clear


drop if chars_original<10000
/* this excludes two translations 
(xbi-x-bible-wampukuamp, xbi-x-bible-yanimoi) 
because the book "Revelation" is not complete
in both translations */ 

keep if book==40|book==41|book==42|book==43|book==44|book==66


bysort trans: gen count=_N
drop if count<6

sort trans
local till=_N

/* miniumum sample size in words */

bysort trans: egen minimum=min(words_original)
gen random=runiform()

egen group = cut(random), group(`1') label

replace group=group+1	
sum group
local min=r(min)
local max=r(max)

keep trans min group book


forvalues g=`min'/`max' {
	preserve
	keep if group==`g'
	save "`dir'\\calculation_`g'", replace
	winexec C:\Program Files (x86)\Stata14\StataMP-64 -q do `dir'\\calc_entropy_constant_fullshuffle `g'
	restore
}

/* wait until everything is finished */
clear
 forvalues g=1/`1'{  
   capture confirm file finished_`g'.dta
   while _rc != 0 {
      sleep 2000
      capture confirm file finished_`g'.dta
   }
 }
use  entropy_bible_constant_1, clear 
drop in 1/l
forvalues g=1/`1' {
	capture append using entropy_bible_constant_`g'
	capture erase entropy_bible_constant_`g'.dta
	capture erase calculation_`g'.dta
	capture erase finished_`g'.dta
}
drop if book==.

/* calc D values */
foreach type in original order structure {
gen H_`type'=(entropy_`type'/chars_`type')^(-1)

} 

drop if H_original==.

generate D_structure=H_structure-H_original
generate D_order=H_order-H_original

list if words_original!=words_order|words_original!=words_structure
save entropy_bible_constant_fullshuffle, replace 
   	
exit